/*
Construct a panel of entrepreneurs, and identify when (if ever) they have
their first child.

Usage:
  do setup_panel.do <min_ownership>
  min_ownership = 33  -->  main sample (1/3+ ownership)
  min_ownership = 25  -->  robustness sample (25%+ ownership)

Uses:
data/admin/raw/firm_ownership_01_19.dta
data/admin/intermediate/first_births_to_2019.dta [created in identify_births.do]
data/admin/raw/bs9519.dta
data/admin/raw/ee_extended_9518_lfirm.dta

data/admin/raw/constant_traits.dta (or constant_traits_updateOct2023.dta for 25pct)
data/admin/raw/cpi_1960_2020_USD.dta
data/admin/raw/education_nus2000.dta
data/admin/raw/marital_cohabit_91_20.dta
data/admin/raw/increg_93_19.dta

Creates (min_ownership = 33):
data/admin/intermediate/payment_to_owners_9618.dta
data/admin/intermediate/penalty_cohort_final.dta

Creates (min_ownership = 25):
data/admin/intermediate/payment_to_owners_9618_25p.dta
data/admin/intermediate/penalty_cohort_final_25p.dta
*/

args min_ownership

cd "$root"

* ---------------------------------------------------------------------------
* Validate argument and set parameters
* ---------------------------------------------------------------------------

if "`min_ownership'" == "" {
    di as error "Usage: do setup_panel.do <min_ownership>  (33 or 25)"
    exit 198
}
if `min_ownership' == 33 {
    local max_cat      3
    local payment_file data/admin/intermediate/payment_to_owners_9618.dta
    local output_file  data/admin/intermediate/penalty_cohort_final.dta
    local traits_file  data/admin/raw/constant_traits.dta
}
else if `min_ownership' == 25 {
    local max_cat      4
    local payment_file data/admin/intermediate/payment_to_owners_9618_25p.dta
    local output_file  data/admin/intermediate/penalty_cohort_final_25p.dta
    local traits_file  data/admin/raw/constant_traits_updateOct2023.dta
}
else {
    di as error "min_ownership must be 33 or 25"
    exit 198
}

di "Building panel: min_ownership = `min_ownership'%, max owner_cat = `max_cat'"

* ---------------------------------------------------------------------------
* Identify how much firms paid all of their owners
* ---------------------------------------------------------------------------

use data/admin/raw/firm_ownership_01_19.dta, clear
keep if owner_cat >= 1 & owner_cat <= `max_cat'
collapse (min) first_year_own = year (max) last_year_own = year, by(lnr lfirm)
replace first_year_own = 1995 if first_year_own == 2001 // infer
merge 1:m lnr lfirm using data/admin/raw/ee_extended_9518_lfirm.dta, keep(3) nogen
keep if year >= first_year_own & year <= last_year_own
collapse (sum) owner_salaries = k_lonn, by(lfirm year)
save `payment_file', replace

* ---------------------------------------------------------------------------
* Identify whether an individual owns a firm
* ---------------------------------------------------------------------------

use data/admin/raw/firm_ownership_01_19.dta, clear
keep if owner_cat >= 1 & owner_cat <= `max_cat'

* keep only firms with balance sheets
merge m:1 lfirm year using data/admin/raw/bs9519.dta, keep(3) nogen

* NOTE: this is only up to 2019
collapse (min) first_year_owned = year (max) last_year_owned = year, ///
    by(lnr lfirm)
replace last_year_owned = . if last_year_owned == 2019

* merge back on industry
gen year = first_year_owned
merge 1:1 lnr lfirm year using data/admin/raw/firm_ownership_01_19.dta, keep(3) nogen
drop year

tempfile maj_ownership
save `maj_ownership'

* ---------------------------------------------------------------------------
* Construct a panel of firm owners w/ income, firm, and fertility info
* ---------------------------------------------------------------------------

* start with the firm owners
use `maj_ownership', clear
keep lnr lfirm
duplicates drop

* merge on timing of first birth
merge m:1 lnr using data/admin/intermediate/first_births_to_2019.dta, keep(1 3) nogen

* merge on gender
merge m:1 lnr using `traits_file', ///
    keep(3) nogen keepusing(lnr male b_year)

* expand the panel
expand 25
bys lnr lfirm : gen year = _n + 1994
gen age = year - b_year // individual age

* merge on firm ownership details
merge m:1 lfirm year using data/admin/raw/bs9519.dta, keep(1 3) nogen

* merge on salary to owners
merge m:1 lfirm year using `payment_file', keep(1 3) nogen

* merge on total number of workers (including owners)
preserve
use data/admin/raw/ee_extended_9518_lfirm.dta, clear
keep year lnr lfirm manager
merge 1:1 lnr lfirm year using data/admin/raw/firm_ownership_01_19.dta, ///
    keepusing(lnr lfirm year owner_cat) nogen
keep if (owner_cat != 0 | owner_cat == .) & year <= 2018
gen n_emp_excl_owners = !(owner_cat >= 1 & owner_cat <= `max_cat')
collapse (count) n_workers_total=lnr ///
         (sum) n_emp_excl_owners, by(lfirm year)

tempfile employ
save `employ', replace
restore

merge m:1 lfirm year using `employ', keep(1 3) nogen

* get share owned in the firm
merge 1:1 lnr lfirm year using data/admin/raw/firm_ownership_01_19.dta, ///
    keep(1 3) nogen keepusing(lnr lfirm year ownership_pct owner_cat firm_byear)
replace ownership_pct = 0 if inrange(year, 2001, 2019) & ownership_pct == .
replace owner_cat = 0 if inrange(year, 2001, 2019) & owner_cat == .

* get sector based on max(2001, firm birthyear)
preserve
use data/admin/raw/firm_ownership_01_19.dta, clear
bys lfirm : egen first_year = min(year)
keep if year == first_year
keep lfirm sic_code sic_2digit ind_cat
duplicates drop

tempfile sectors
save `sectors'
restore

merge m:1 lfirm using `sectors', keep(1 3) nogen
encode ind_cat, gen(ind_cat2)

* drop variables that we aren't using
drop profit_before_tax total_current_assets debt_lt debt_st depreciation ///
    equity net_profit dividends total_fixed_assets cogs

* convert to real terms (2015 USD)
merge m:1 year using data/admin/raw/cpi_1960_2020_USD.dta
drop _merge cpi_orig pgrowth
replace cpi = cpi * 1.0612 // 2011 to 2015 USD
drop if lnr == .

if `min_ownership' == 33 {
    local nominal_vars revenues operating_costs operating_profits ///
                       owner_salaries total_assets salaries
}
else {
    local nominal_vars revenues operating_costs operating_profits ///
                       owner_salaries total_assets
}
foreach x of varlist `nominal_vars' {
    replace `x' = `x' / cpi
}

replace owner_salaries = 0 if owner_salaries == . & year <= 2018
gen profit_before_self = operating_profits + owner_salaries
gen costs_before_self = operating_costs - owner_salaries

if `min_ownership' == 33 {
    replace salaries = 0 if salaries == . | salaries < 0
    gen value_added = operating_profits + salaries
}

* identify firm age
bys lfirm : egen firm_byear2 = max(firm_byear) // filling in the missings
replace firm_byear = firm_byear2
drop firm_byear2

gen firm_age = year - firm_byear if year >= firm_byear
keep if firm_age >= 0 & firm_age != .

* construct indicator for currently owning the firm in question
gen currently_own = (owner_cat >= 1 & owner_cat <= `max_cat') if ownership_pct != .

* merge on education
merge m:1 lnr using data/admin/raw/education_nus2000.dta, keep(1 3) nogen

* identify if firm was sold
preserve
tempfile any_owners
use data/admin/raw/firm_ownership_01_19.dta, clear
keep if owner_cat >= 1 & owner_cat <= `max_cat'
keep lfirm year
duplicates drop
gen any_maj_owner_lfirm = 1
save `any_owners'
restore

merge m:1 lfirm year using `any_owners', keep(1 3)
replace any_maj_owner_lfirm = 0 if _merge == 1
sort lnr lfirm year
bys lnr lfirm : gen lag_own = currently_own[_n-1]
bys lnr lfirm : gen lead_own = currently_own[_n+1]
gen sold_firm = (currently_own == 0 & lag_own == 1 & ///
    any_maj_owner_lfirm == 1 & lead_own == 0) if lead_own != .
drop lag_own lead_own any_maj_owner_lfirm _merge

* sold firm hazard rate: Pr[sell in t | did not sell before t]
* hence after the firm is sold it drops out of the sample
bys lnr lfirm : egen year_sold = min(year * sold_firm + 9999 * (1 - sold_firm))
gen sold_firm_hazard = sold_firm if year_sold == 9999 | year <= year_sold

* identify whether t+1 partner was working in period t
tempfile employed
tempfile marital

preserve
use data/admin/raw/ee_extended_9518_lfirm.dta, clear
keep if inrange(year, 2001, 2018)
keep lnr year
duplicates drop
ren lnr spousal_lnr
gen partner_employed = 1
save `employed'

use data/admin/raw/marital_cohabit_91_20.dta, clear
keep if inlist(marital, 0, 2) & inrange(year, 2001, 2019)
keep year lnr spousal_lnr
replace year = year - 1
merge m:1 spousal_lnr year using `employed', keep(1 3) nogen
replace partner_employed = 0 if partner_employed == .
keep year lnr partner_employed
save `marital'
restore

merge m:1 lnr year using `marital', keep(1 3) nogen

* identify whether t+1 partner owned stock in or received a salary from the firm
tempfile marital_small
tempfile partner

preserve
use data/admin/raw/marital_cohabit_91_20.dta, clear
keep if inlist(marital, 0, 2) & inrange(year, 2005, 2019)
keep year lnr spousal_lnr
replace year = year - 1
save `marital_small'
restore

preserve
keep lnr lfirm year
merge m:1 lnr year using `marital_small', keep(1 3) nogen
ren (lnr spousal_lnr) (orig_lnr lnr)
merge m:1 lnr lfirm year using data/admin/raw/ee_extended_9518_lfirm.dta, keep(1 3) nogen
merge m:1 lnr lfirm year using data/admin/raw/firm_ownership_01_19.dta, keep(1 3) nogen
ren (orig_lnr lnr) (lnr spousal_lnr)
gen partner_at_firm = 1 if k_lonn > 0 & k_lonn != . & spousal_lnr != .
replace partner_at_firm = 1 if ownership_pct > 0 & ownership_pct != . & spousal_lnr != .
replace partner_at_firm = 0 if partner_at_firm == . & inrange(year, 2001, 2018) & spousal_lnr != .

keep lnr lfirm year partner_at_firm
save `partner'
restore

merge 1:1 lnr lfirm year using `partner', keep(1 3) nogen

* identify cohorts
gen cohort = year + 1 if (year == year_first_child - 1) & ///
    currently_own == 1 & (operating_profits != .) & (firm_age >= 1)
bys lnr lfirm : egen cohort_id = max(cohort)
replace cohort_id = 0 if cohort_id == .

* remove untreated co-owners of treated firms
bys lfirm : egen min_cohort_id = min(cohort_id * (cohort_id > 0) + 9999 * (cohort_id == 0))
keep if (min_cohort == cohort_id) | min_cohort_id == 9999

* remove treated men whose wives work at the firm
replace partner_at_firm = 0 if partner_at_firm == .
bys lnr lfirm : ///
    egen ever_partner = max((year <= cohort_id) * (year >= cohort_id - 2) * partner_at_firm)
drop if ever_partner == 1 & male == 1

* augment with information on lfirm * lnr salaries
merge 1:1 lnr lfirm year using data/admin/raw/ee_extended_9518_lfirm.dta, ///
    keep(1 3) keepusing(lnr lfirm year k_lonn) nogen
replace k_lonn = 0 if k_lonn == . & year <= 2018
ren k_lonn lnr_lfirm_salary

* augment with information on total income
merge m:1 lnr year using data/admin/raw/increg_93_19.dta, ///
    keep(1 3) keepusing(lnr year salary) nogen
replace salary = 0 if salary == .
ren salary lnr_salary

* identify benefits, sick leave paid
* this is the difference between EE salaries and salary
preserve
keep lnr year
keep if year <= 2018
duplicates drop
merge 1:m lnr year using data/admin/raw/ee_extended_9518_lfirm.dta, keep(1 3) nogen
replace k_lonn = 0 if k_lonn == .
collapse (sum) ee_salary = k_lonn, by(lnr year)

tempfile ee_wages
save `ee_wages'
restore

merge m:1 lnr year using `ee_wages', keep(1 3) nogen
gen taxable_benefit = lnr_salary - ee_salary
gen lnr_lfirm_salary_w_benefit = lnr_lfirm_salary + taxable_benefit
drop ee_salary taxable_benefit

* generate wage earnings and deflate salary variables (main sample only)
if `min_ownership' == 33 {
    gen wage_earnings = lnr_salary - lnr_lfirm_salary_w_benefit
    foreach x of varlist wage_earnings lnr_salary lnr_lfirm_salary {
        replace `x' = `x' / cpi
        replace `x' = 0 if `x' < 0
    }
}

* live near grandparents when baby was born?
merge m:1 lnr year using data/admin/intermediate/live_near_parents.dta, keep(1 3) nogen
replace live_near_parents = 0 if live_near_parents == .

* now have data on LNR * firm * year level
sort lnr lfirm year
save `output_file', replace
